######################################################################################
######################################################################################
#Replication code for:

#Mark T. Buntaine, Ryan Jablonski, Daniel Nielson, Paula Pickering
#SMS Texts on Corruption Help Ugandan Voters Hold Elected Councillors Accountable at the Polls

#Prepared by: Mark Buntaine & Ryan Jablonski
#Mark Buntaine contact (as of April 2018): buntaine@bren.ucsb.edu
#Ryan Jablonski contact (as of April 2018): r.s.jablonski@lse.ac.uk

#Compiled using R Version 3.4.3 (version "Kite-Eating Tree") on Mac running OS X 10.13.4
######################################################################################
######################################################################################

######################################################################################
###Packages
######################################################################################
library(lfe) #Version 2.6-2291
library(ggplot2) #Version 2.2.1
#Note: on warnings, see: https://cran.r-project.org/web/packages/lfe/vignettes/identification.pdf
library(stargazer) #Version 5.2.1
library(dplyr) #Version 0.7.4
library(cobalt) #Version 3.2.2
library(gridExtra) #Version 2.3


######################################################################################
###Functions
######################################################################################
felm.ri <- function(formula, dta, treat.var, rand.ob, rand.ob.info.cols, join.var, sims, ...){
  require(lfe)
  print("1. data and rand.ob must have rows organized in identical order")
  print("2. treat.var should be first right-side entry in formula")
  print("3. join.var must have identical name between data and rand.ob")
  
  ate <- coef(felm(formula, data=dta))[1]
  N <- felm(formula, data=dta)$N
  ate.samp.dist <- rep(NA,sims)
  
  for (i in 1:sims){
    dta[,treat.var] <- rand.ob[rand.ob[,join.var] %in% dta[,join.var], rand.ob.info.cols+i]
    
    ate.samp.dist[i] <- coef(felm(formula, data=dta))[1]
  }
  
  p.two.way <- sum(abs(ate)<abs(ate.samp.dist))/sims
  p.one.way.greater <- sum(ate<ate.samp.dist)/sims
  p.one.way.lesser <- sum(ate>ate.samp.dist)/sims
  se <- sd(ate.samp.dist)
  
  coun <- list("ate" = ate, "ate.samp.dist" = ate.samp.dist, "se"=se, "p.two.way" = p.two.way, "p.one.way.greater" = p.one.way.greater, "p.one.way.lesser" = p.one.way.lesser, "N" = N)
  return(coun)
}

felm.ri2 <- function(formula, dta, treat.var, rand.ob, rand.ob.info.cols, join.var, sims, ...){
  require(lfe)
  print("1. data and rand.ob must have rows organized in identical order")
  print("2. treat.var should be first right-side entry in formula")
  print("3. join.var must have identical name between data and rand.ob")
  
  #Note: have to make this for multiple factor of one randomized treatment
  
  ran.coef.num <- length(unique(dta[,treat.var][!is.na(dta[,treat.var])]))-1 #Gives number of coefficients to keep for crossed treatment indicator
  N <- felm(formula, data=dta)$N
  coef <- coef(felm(formula, data=dta))[1:ran.coef.num]
  coef.samp.dist <- matrix(data=NA, nrow=ran.coef.num, ncol=sims)
  #row.names(coef.samp.dist) <- names(coef)
  
  for (i in 1:sims){
    dta[,treat.var] <- rand.ob[rand.ob[,join.var] %in% dta[,join.var], rand.ob.info.cols+i]
    coef.samp.dist[,i] <- coef(felm(formula, data=dta))[1:ran.coef.num]
  }
  
  se <- apply(coef.samp.dist, 1, sd) #Cannot get SEs off non-randomized parameters
  
  p.two.way <- rep(NA,length(coef))
  p.one.way.greater <- rep(NA,length(coef))
  p.one.way.lesser <- rep(NA,length(coef))
  
  for (i in 1:length(coef)){
    p.two.way[i] <- sum(abs(coef[i])<abs(coef.samp.dist[i,]))/sims
    p.one.way.greater[i] <- sum(coef[i]<coef.samp.dist[i,])/sims
    p.one.way.lesser[i] <- sum(coef[i]>coef.samp.dist[i,])/sims
  }
  
  coun <- list("coef" = coef, "coef.samp.dist" = coef.samp.dist, "se"=se, "p.two.way" = p.two.way, "p.one.way.greater" = p.one.way.greater, "p.one.way.lesser" = p.one.way.lesser, "N" = N)
  return(coun)
}

felm.ri3 <- function(formula, dta, treat.var, rand.ob, rand.ob.info.cols, join.var, sims, weights, ...){
  require(lfe)
  print("1. data and rand.ob must have rows organized in identical order")
  print("2. treat.var should be first right-side entry in formula")
  print("3. join.var must have identical name between data and rand.ob")
  
  ate <- coef(felm(formula, data=dta, weights = weights))[1]
  N <- felm(formula, data=dta)$N
  ate.samp.dist <- rep(NA,sims)
  
  for (i in 1:sims){
    dta[,treat.var] <- rand.ob[rand.ob[,join.var] %in% dta[,join.var], rand.ob.info.cols+i]
    
    ate.samp.dist[i] <- coef(felm(formula, data=dta))[1]
  }
  
  p.two.way <- sum(abs(ate)<abs(ate.samp.dist))/sims
  p.one.way.greater <- sum(ate<ate.samp.dist)/sims
  p.one.way.lesser <- sum(ate>ate.samp.dist)/sims
  se <- sd(ate.samp.dist)
  
  coun <- list("ate" = ate, "ate.samp.dist" = ate.samp.dist, "se"=se, "p.two.way" = p.two.way, "p.one.way.greater" = p.one.way.greater, "p.one.way.lesser" = p.one.way.lesser, "N" = N)
  return(coun)
}


#function to plot multiple ggplot objects together
#from http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

#Function for returning the mode of a distribution as a single integer
SingleMode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}


######################################################################################
###Data input
######################################################################################

#Update working directory as needed to folder with replication materials
#setwd("D:/google drive/Uganda Vote Choice project/Analysis/Replication/Replication_PNAS/")
setwd("~/Google Drive/Uganda Vote Choice project/Analysis/Replication/Replication_PNAS/")

data <- read.csv("./data/MASTER_Analysis_Anon_180424.csv", stringsAsFactors=FALSE)

Budget_RI <- read.csv("./data/Budget_RI.csv", stringsAsFactors=FALSE)
Budget_RI <- Budget_RI[match(data$id.cleaned, Budget_RI$id.cleaned),] #reordering to match "data"

Density_RI <- read.csv("./data/Density_RI.csv", stringsAsFactors=FALSE)
Density_RI <- Density_RI[match(data$id.cleaned, Density_RI$id.cleaned),] #reordering to match "data"

#Audit data for all districts
budget_audit=read.csv("./data/lc5_budget_and_audit.csv", stringsAsFactors = FALSE)

#Electoral results for all lc5 councillors
candidates=read.csv("./data/lc5_candidates.csv", stringsAsFactors = FALSE)

#Comparison of survey and official voting
votes.councillor=read.csv("./data/councilor_official_vs_survey_votes.csv", stringsAsFactors = FALSE)
votes.chair=read.csv("./data/chair_official_vs_survey_votes.csv", stringsAsFactors = FALSE)


######################################################################################
###Data setup
######################################################################################
schooling.levels <- c("no_schooling","some_primary_s","completed_prim","some_secondary","completed_seco","some_universit","completed_univ","some_post_grad","completed_mast","refuse_to_answ")
data$r.education <- factor(data$r.How_much_schooling_have_you_co, levels=schooling.levels)

#Remaking "bd.multi.treat2" because of unwanted class conversion in "data" from read.csv()
data$bd.multi.treat2 <- ifelse(data$budget.treat==1 & data$density.treat2==1,"1.1",NA)
data$bd.multi.treat2 <- ifelse(data$budget.treat==1 & data$density.treat2==0,"1.0",data$bd.multi.treat2)
data$bd.multi.treat2 <- ifelse(data$budget.treat==0 & data$density.treat2==1,"0.1",data$bd.multi.treat2)
data$bd.multi.treat2 <- ifelse(data$budget.treat==0 & data$density.treat2==0,"0.0",data$bd.multi.treat2)

data$partyID.slim <- factor(data$partyID.slim, levels=c("nrm","opposition","independent"))

data$aligned.lc5.chair.inc.noNA <- ifelse(is.na(data$aligned.lc5.chair.inc), "missing", data$aligned.lc5.chair.inc)
data$aligned.lc5.councillor.inc.noNA <- ifelse(is.na(data$aligned.lc5.councillor.inc), "missing", data$aligned.lc5.councillor.inc)
data$aligned.lc3.chair.inc.noNA <- ifelse(is.na(data$aligned.lc3.chair.inc), "missing", data$aligned.lc3.chair.inc)
data$aligned.lc3.councillor.inc.noNA <- ifelse(is.na(data$aligned.lc3.councillor.inc), "missing", data$aligned.lc3.councillor.inc)

#Fixed effect weights
data$fe.w <- ifelse(is.na(data$density.treat2), 1/(.5*.5), 1/(.8*.2))

#prior and posteriors as factors with labels
data$f.budget.prior=factor(data$budget.prior, labels=c("Much Worse (1)","A Little Worse (2)", "Don't Know (3)", "Better (4)", "Much Better (5)"))
data$f.budget.actual=factor(data$budget.actual, labels=c("Much Worse (1)","A Little Worse (2)", "Better (4)", "Much Better (5)"))
data$f.budget.post=factor(data$budget.post, labels=c("Much Worse (1)","A Little Worse (2)", "Don't Know (3)", "Better (4)", "Much Better (5)"))


######################################################################################
###Budget subsets
######################################################################################

#Prior-defined subgroups
budget.good <- subset(data, budget.actual > budget.prior | (budget.actual==budget.prior & budget.actual>=4))
budget.bad <- subset(data, budget.actual < budget.prior | (budget.actual==budget.prior & budget.actual<=2))

#These are the subgroups not defined by priors, for an extended analysis
budget.positive <- subset(data, budget.actual>=4)
budget.negative <- subset(data, budget.actual<=2)

#Main subset: no individual incumbent switched parties and ran again in 2016 and no redistricting (includes elections w/o incumbent individual)
budget.good.lc5.chair.comp <- subset(budget.good, lc5.chair.competitive==1 & lc5.chair.party.switch==0)
budget.good.lc5.councillor.comp <- subset(budget.good, lc5.councillor.competitive==1 & lc5.councillor.party.switch==0 & is.na(lc5.councillor.redistricted2016))
budget.bad.lc5.chair.comp <- subset(budget.bad, lc5.chair.competitive==1 & lc5.chair.party.switch==0)
budget.bad.lc5.councillor.comp <- subset(budget.bad, lc5.councillor.competitive==1 & lc5.councillor.party.switch==0 & is.na(lc5.councillor.redistricted2016))
budget.positive.lc5.chair.comp <- subset(budget.positive, lc5.chair.competitive==1 & lc5.chair.party.switch==0)
budget.positive.lc5.councillor.comp <- subset(budget.positive, lc5.councillor.competitive==1 & lc5.councillor.party.switch==0 & is.na(lc5.councillor.redistricted2016))
budget.negative.lc5.chair.comp <- subset(budget.negative, lc5.chair.competitive==1 & lc5.chair.party.switch==0)
budget.negative.lc5.councillor.comp <- subset(budget.negative, lc5.councillor.competitive==1 & lc5.councillor.party.switch==0 & is.na(lc5.councillor.redistricted2016))

#Verified Recipient subgroups
budget.good.lc5.chair.comp.c <- subset(budget.good, lc5.chair.competitive==1 & lc5.chair.party.switch==0 & d.X_11_Over_the_last_several_days=="yes")
budget.good.lc5.councillor.comp.c <- subset(budget.good, lc5.councillor.competitive==1 & lc5.councillor.party.switch==0 & is.na(lc5.councillor.redistricted2016) & d.X_11_Over_the_last_several_days=="yes")
budget.bad.lc5.chair.comp.c <- subset(budget.bad, lc5.chair.competitive==1 & lc5.chair.party.switch==0 & d.X_11_Over_the_last_several_days=="yes")
budget.bad.lc5.councillor.comp.c <- subset(budget.bad, lc5.councillor.competitive==1 & lc5.councillor.party.switch==0 & is.na(lc5.councillor.redistricted2016) & d.X_11_Over_the_last_several_days=="yes")


######################################################################################
###Figure S2: Map of sampled villages in Uganda
#Excluded for reasons of anonymity. Study sites and map replication can be requested from the authors. 
######################################################################################


######################################################################################
###Figure S3: CONSORT diagram tracking study design
######################################################################################

consort.data=data
consort=data.frame(NA)
consort.data$c=1
num.in.village=aggregate(consort.data$c, by=list(consort.data$location.id), FUN=sum)
consort.data$num.in.village=num.in.village[match(consort.data$location.id,num.in.village$Group.1), "x"]


#Row 1 Column 1
consort$assessed_for_eligibility=30296 #from baseline recruitment files available on request

#Row 1 Column 2  
consort$excluded=14213 #from baseline recruitment files available on request
consort$excluded.refused=218 #from baseline recruitment files available on request
consort$excluded.unreachable=13995 #from baseline recruitment files available on request

#Row 2 Column 1
consort$randomized=nrow(consort.data)

#Row 3 Column 1
consort$treatment=sum(consort.data$budget.treat==1)
consort$treatment80=sum(consort.data$density.treat==1 & consort.data$budget.treat==1 & consort.data$num.in.village>=15)
consort$treatment50=sum(consort.data$density.treat==0 & consort.data$budget.treat==1 & consort.data$num.in.village<15)
consort$treatment20=sum(consort.data$density.treat==0 & consort.data$budget.treat==1 & consort.data$num.in.village>=15)
consort$treatment==consort$treatment80+consort$treatment50+consort$treatment20

#Row 3 Column 2
consort$control=sum(consort.data$budget.treat==0)
consort$control80=sum(consort.data$density.treat==1 & consort.data$budget.treat==0 & consort.data$num.in.village>=15)
consort$control50=sum(consort.data$density.treat==0 & consort.data$budget.treat==0 & consort.data$num.in.village<15)
consort$control20=sum(consort.data$density.treat==0 & consort.data$budget.treat==0 & consort.data$num.in.village>=15)
consort$control==consort$control80+consort$control50+consort$control20

#Row 4 Column 1
consort$treat_surveyed_endline = sum(consort.data$d.attr==0 & consort.data$budget.treat==1)
consort$treat_unreachable_endline = consort$treatment-consort$treat_surveyed_endline

#Row 4 Column 2
consort$control_surveyed_endline = sum(consort.data$d.attr==0 & consort.data$budget.treat==0)
consort$control_unreachable_endline = consort$control-consort$control_surveyed_endline

#Row 5 Column 1

#analyzed
consort$treat_analyzed_chair=sum(budget.bad.lc5.chair.comp$budget.treat==1 & !is.na(budget.bad.lc5.chair.comp$lc5.chair.inc.vote) & !is.na(budget.bad.lc5.chair.comp$budget.treat) & !is.na(budget.bad.lc5.chair.comp$lc5.chair.intent))+
  sum(budget.good.lc5.chair.comp$budget.treat==1 & !is.na(budget.good.lc5.chair.comp$lc5.chair.inc.vote) & !is.na(budget.good.lc5.chair.comp$budget.treat) & !is.na(budget.good.lc5.chair.comp$lc5.chair.intent))
consort$treat_analyzed_councillor=sum(budget.bad.lc5.councillor.comp$budget.treat==1 & !is.na(budget.bad.lc5.councillor.comp$lc5.councillor.inc.vote) & !is.na(budget.bad.lc5.councillor.comp$budget.treat) & !is.na(budget.bad.lc5.councillor.comp$lc5.councillor.intent))+
  sum(budget.good.lc5.councillor.comp$budget.treat==1 & !is.na(budget.good.lc5.councillor.comp$lc5.councillor.inc.vote) & !is.na(budget.good.lc5.councillor.comp$budget.treat) & !is.na(budget.good.lc5.councillor.comp$lc5.councillor.intent) )

#reasons for exclusion
consort$treat_refused_voting=sum(consort.data$d.attr==0 & consort.data$budget.treat==1 & (!(consort.data$d.vote_in_LC5_election.x %in% c("no","yes")) & !is.na(consort.data$d.vote_in_LC5_election.x)))
consort$treat_refused_vote_choice_chair=sum(consort.data$d.attr==0 & consort.data$budget.treat==1 & ((consort.data$d.party_of_LC5_chairperson_voted.x %in% c("refused_to_answer")) ))
consort$treat_refused_vote_choice_councillor=sum(consort.data$d.attr==0 & consort.data$budget.treat==1 & ((consort.data$d.party_of_LC5_councillor_voted.x %in% c("refused_to_answer")) ))
consort$treat_refused_vote_intent_chair=sum(consort.data$d.attr==0 & consort.data$budget.treat==1 & ((consort.data$b.Q6_vote_for_current_LC5chair %in% c("refused_to_ans")) ))
consort$treat_refused_vote_intent_councillor=sum(consort.data$d.attr==0 & consort.data$budget.treat==1 & ((consort.data$b.Q7_vote_for_current_LC3council %in% c("refused_to_ans")) ))
consort$treat_prior_nonresponsive=sum(consort.data$d.attr==0 & consort.data$budget.treat==1 & ((consort.data$b.Q22_LC5_record_of_budget_mgmt %in% c("", NA)) ))
consort$treat_chair_uncompetitive=sum(consort.data$d.attr==0 & consort.data$budget.treat==1 & consort.data$lc5.chair.competitive==0)
consort$treat_councillor_uncompetitive=sum(consort.data$d.attr==0 & consort.data$budget.treat==1 & (consort.data$lc5.councillor.competitive==FALSE | is.na(consort.data$lc5.councillor.competitive)))
consort$treat_changed_parties_chair=sum(consort.data$d.attr==0 & consort.data$budget.treat==1 & consort.data$lc5.chair.party.switch==1)
consort$treat_changed_parties_councillor=sum(consort.data$d.attr==0 & consort.data$budget.treat==1 & consort.data$lc5.councillor.party.switch==1)
consort$treat_did_not_vote=sum(consort.data$d.attr==0 & consort.data$budget.treat==1 & (consort.data$d.vote_in_LC5_election.x=="no" & !is.na(consort.data$d.vote_in_LC5_election.x)))

#Row 5 Column 2

#analyzed
consort$control_analyzed_chair=sum(budget.bad.lc5.chair.comp$budget.treat==0 & !is.na(budget.bad.lc5.chair.comp$lc5.chair.inc.vote) & !is.na(budget.bad.lc5.chair.comp$budget.treat) & !is.na(budget.bad.lc5.chair.comp$lc5.chair.intent))+
  sum(budget.good.lc5.chair.comp$budget.treat==0 & !is.na(budget.good.lc5.chair.comp$lc5.chair.inc.vote) & !is.na(budget.good.lc5.chair.comp$budget.treat) & !is.na(budget.good.lc5.chair.comp$lc5.chair.intent))

consort$control_analyzed_councillor=sum(budget.bad.lc5.councillor.comp$budget.treat==0 & !is.na(budget.bad.lc5.councillor.comp$lc5.councillor.inc.vote) & !is.na(budget.bad.lc5.councillor.comp$budget.treat) & !is.na(budget.bad.lc5.councillor.comp$lc5.councillor.intent))+
  sum(budget.good.lc5.councillor.comp$budget.treat==0 & !is.na(budget.good.lc5.councillor.comp$lc5.councillor.inc.vote) & !is.na(budget.good.lc5.councillor.comp$budget.treat) & !is.na(budget.good.lc5.councillor.comp$lc5.councillor.intent) )

#reasons for exclusion
consort$control_refused_voting=sum(consort.data$d.attr==0 & consort.data$budget.treat==0 & (!(consort.data$d.vote_in_LC5_election.x %in% c("no","yes")) & !is.na(consort.data$d.vote_in_LC5_election.x)))
consort$control_refused_vote_choice_chair=sum(consort.data$d.attr==0 & consort.data$budget.treat==0 & ((consort.data$d.party_of_LC5_chairperson_voted.x %in% c("refused_to_answer")) ))
consort$control_refused_vote_choice_councillor=sum(consort.data$d.attr==0 & consort.data$budget.treat==0 & ((consort.data$d.party_of_LC5_councillor_voted.x %in% c("refused_to_answer")) ))
consort$control_refused_vote_intent_chair=sum(consort.data$d.attr==0 & consort.data$budget.treat==0 & ((consort.data$b.Q6_vote_for_current_LC5chair %in% c("refused_to_ans")) ))
consort$control_refused_vote_intent_councillor=sum(consort.data$d.attr==0 & consort.data$budget.treat==0 & ((consort.data$b.Q7_vote_for_current_LC3council %in% c("refused_to_ans")) ))
consort$control_prior_nonresponsive=sum(consort.data$d.attr==0 & consort.data$budget.treat==0 & ((consort.data$b.Q22_LC5_record_of_budget_mgmt %in% c("", NA)) ))
consort$control_chair_uncompetitive=sum(consort.data$d.attr==0 & consort.data$budget.treat==0 & consort.data$lc5.chair.competitive==0)
consort$control_councillor_uncompetitive=sum(consort.data$d.attr==0 & consort.data$budget.treat==0 & (consort.data$lc5.councillor.competitive==FALSE | is.na(consort.data$lc5.councillor.competitive)))
consort$control_changed_parties_chair=sum(consort.data$d.attr==0 & consort.data$budget.treat==0 & consort.data$lc5.chair.party.switch==1)
consort$control_changed_parties_councillor=sum(consort.data$d.attr==0 & consort.data$budget.treat==0 & consort.data$lc5.councillor.party.switch==1)
consort$control_did_not_vote=sum(consort.data$d.attr==0 & consort.data$budget.treat==0 & (consort.data$d.vote_in_LC5_election.x=="no" & !is.na(consort.data$d.vote_in_LC5_election.x)))


for(i in c(2:length(consort))){
  cat(paste(colnames(consort)[i],": ", consort[1,i], "\n", sep=""))
}

consort.data=NULL


######################################################################################
###Figure S4: Comparison of incumbent vote share, pre- and post-election results being announced by politician
######################################################################################

data$end.y <- strptime(data$end.y, "%Y-%m-%d %H:%M:%S")
data$start.y <- strptime(data$start.y, "%Y-%m-%d %H:%M:%S")
data$X_submission_time.y <- strptime(data$X_submission_time.y, "%Y-%m-%dT%H:%M:%S")

data$before.results <- ifelse((data$end.y < "2016-02-25 17:00 PST" & data$end.y > "2016-02-25 00:00 PST") | (data$end.y < "2016-02-25 00:00 PST" & data$X_submission_time.y < "2016-02-25 17:00 PST"), 1, 0)

###Creating chairs dataframe and plot
name <- as.character(unique(data$lc5.chair.name))
table(data$lc5.chair.name,data$lc5.chair.competitive) #To find uncompetitive chairs to remove
chair.not.competitive <- c("bazanye, milton mutabazi","birungi, norman k. b.","kanaku, michael","orot, ismael")
chairs <- data.frame(name[!(name %in% chair.not.competitive)])
names(chairs)[1] <- "name"

for (i in 1:nrow(chairs)){
  sub <- subset(data, lc5.chair.name==chairs$name[i])
  chairs$vote.before[i] <- nrow(sub[sub$lc5.chair.inc.vote==1 & !is.na(sub$lc5.chair.inc.vote) & sub$before.results==1,])
  chairs$n.before[i] <- nrow(sub[!is.na(sub$lc5.chair.inc.vote) & sub$before.results==1,])
  chairs$vote.after[i] <- nrow(sub[sub$lc5.chair.inc.vote==1 & !is.na(sub$lc5.chair.inc.vote) & sub$before.results==0,])
  chairs$n.after[i] <- nrow(sub[!is.na(sub$lc5.chair.inc.vote) & sub$before.results==0,])
}

chairs <- subset(chairs, n.before>0)
chairs$percent.before <- chairs$vote.before*100/chairs$n.before
chairs$percent.after <- chairs$vote.after*100/chairs$n.after

chair.ba <- ggplot(chairs, aes(x=percent.after, y=percent.before)) +
  geom_point(size=2, shape=20) + theme_bw() + xlab("% Vote for Incumbent (Post-Results)") +
  ylab("% Vote for Incumbent (Pre-Results)") + coord_cartesian(xlim = c(0,100),ylim = c(0,100)) +
  ggtitle("LC V Chairs") + geom_smooth(method=lm, se=FALSE, color="black")

cor(chairs$percent.before,chairs$percent.after)

##Creating councillor dataframe and plot
tab<-table(data$lc5.councillor.name,data$lc5.councillor.competitive) #To find uncompetitive councillors to remove
tab0<-data.matrix(tab[,1])
tab1<-data.matrix(tab[,2])
tab.dta <- data.frame(tab0,tab1)
tab.dta <- subset(tab.dta, tab0==0 & tab1>0)

councillors <- data.frame(row.names(tab.dta))
names(councillors)[1] <- "name"

for (i in 1:nrow(councillors)){
  sub <- subset(data, lc5.councillor.name==councillors$name[i])
  councillors$vote.before[i] <- nrow(sub[sub$lc5.councillor.inc.vote==1 & !is.na(sub$lc5.councillor.inc.vote) & sub$before.results==1,])
  councillors$n.before[i] <- nrow(sub[!is.na(sub$lc5.councillor.inc.vote) & sub$before.results==1,])
  councillors$vote.after[i] <- nrow(sub[sub$lc5.councillor.inc.vote==1 & !is.na(sub$lc5.councillor.inc.vote) & sub$before.results==0,])
  councillors$n.after[i] <- nrow(sub[!is.na(sub$lc5.councillor.inc.vote) & sub$before.results==0,])
}

councillors <- subset(councillors, n.before>0 & n.after>0)
councillors$percent.before <- councillors$vote.before*100/councillors$n.before
councillors$percent.after <- councillors$vote.after*100/councillors$n.after

councillor.ba <- ggplot(councillors, aes(x=percent.after, y=percent.before)) +
  geom_point(size=2, shape=20) + theme_bw() + xlab("% Vote for Incumbent (Post-Results)") +
  ylab("% Vote for Incumbent (Pre-Results)") + coord_cartesian(xlim = c(0,100),ylim = c(0,100)) +
  geom_smooth(method=lm, se=FALSE, color="black") + ggtitle("LC V Councillors")

cor(councillors$percent.before,councillors$percent.after)

##Plotting together
#pdf("before-after_180424.pdf", width=6.5, height=4)
multiplot(chair.ba,councillor.ba, cols=2)
#dev.off()


######################################################################################
###Figure S5: Comparing self-reported vote choice data from our study to official returns
######################################################################################

#exclude any cases with only one vote
votes.councillor=votes.councillor[votes.councillor$number_votes>1,]
votes.chair=votes.chair[votes.chair$number_votes>1,]

votes.councillor$incumbent_share_official=votes.councillor$incumbent_share_official*100
votes.councillor$incumbent_share_survey=votes.councillor$incumbent_share_survey*100
votes.chair$incumbent_share_official=votes.chair$incumbent_share_official*100
votes.chair$incumbent_share_survey=votes.chair$incumbent_share_survey*100

plot.councillors=ggplot(votes.councillor, aes(incumbent_share_survey, incumbent_share_official))+
  geom_point(alpha = 20/20, position=position_jitter(width=0,height=0))+
  scale_colour_manual(name="",  values =c("black"))+
  geom_smooth(method=lm, se=FALSE, fullrange=TRUE, colour="black")+
  scale_fill_brewer(guide = guide_legend(title = ""))+
  labs(x = "% Votes for Incumbent (Survey)",
       y=  "% Votes for Incumbent (Official)"
  )+
  ylim(0,100)+
  theme_bw()+
  ggtitle("LC V Councillors")

plot.chairs=ggplot(votes.chair, aes(incumbent_share_survey, incumbent_share_official))+
  geom_point(alpha = 20/20, position=position_jitter(width=0,height=0))+
  scale_colour_manual(name="",  values =c("black"))+
  geom_smooth(method=lm, se=FALSE, fullrange=TRUE, colour="black")+
  scale_fill_brewer(guide = guide_legend(title = ""))+
  labs(x = "% Votes for Incumbent (Survey)",
       y=  "% Votes for Incumbent (Official)"
  )+
  ylim(0,100)+
  theme_bw()+
  ggtitle("LC V Chairs")

#jpeg("./figures/FigureS5.jpg",width=700,height=400, quality=100)
multiplot(plotlist=list(plot.chairs, plot.councillors), cols=2)
#dev.off()


######################################################################################
###Figure S6: Share of respondents in each district able to name the water basin color at their polling place
######################################################################################

#Exclude cases that were non responsive or missing
data.temp=data[data$d.color_of_water_basin.x !="don_t_remember",]
data.temp=data.temp[data.temp$d.color_of_water_basin.x !="refused_to_ans",]
data.temp=data.temp[data.temp$d.color_of_water_basin.x !="another_object",]
data.temp=data.temp[data.temp$d.color_of_water_basin.x !="",]
data.temp$basin_color=factor(data.temp$d.color_of_water_basin.x)
data.temp$basin_color_int = as.integer(data.temp$basin_color)
data.temp$goodnews = ifelse(data.temp$budget.actual>data.temp$budget.prior | (data.temp$budget.actual==data.temp$budget.prior & data.temp$budget.actual>=4), 1, 0)
data.temp$badnews = ifelse(data.temp$budget.actual<data.temp$budget.prior | (data.temp$budget.actual==data.temp$budget.prior & data.temp$budget.actual<=2), 1, 0)

data.simple=data.frame(badnews=data.temp$badnews, goodnews=data.temp$goodnews, treat=data.temp$budget.treat, village=data.temp$location.id, district=data.temp$district_lower, basin_color_int=data.temp$basin_color_int, count=rep_len(1,nrow(data.temp)))

#check whether respondent color choices equal the modal choice in a village
data.aggregate=aggregate(data.simple, by=list(village_name=data.simple$village), FUN=SingleMode)
data.merge=merge(data.simple, data.aggregate, by="village")
data.merge$count=1
data.merge$correct = NA
data.merge$correct = ifelse(data.merge$basin_color_int.x==data.merge$basin_color_int.y, TRUE, data.merge$correct)
data.merge$correct = ifelse(data.merge$basin_color_int.x!=data.merge$basin_color_int.y, FALSE, data.merge$correct)

data.treat=data.merge[data.merge$treat.x==1,]
data.treat=data.treat[!is.na(data.treat$district.x),]
m.treat=round(mean(data.treat$correct),3)
sd.treat=round(sd(data.treat$correct),3)

m.treat.bad=round(mean(data.treat[data.treat$badnews.x==1,]$correct, na.rm = TRUE),3)
sd.treat.bad=round(sd(data.treat[data.treat$badnews.x==1,]$correct, na.rm = TRUE),3)
m.treat.good=round(mean(data.treat[data.treat$goodnews.x==1,]$correct, na.rm = TRUE),3)
sd.treat.good=round(sd(data.treat[data.treat$goodnews.x==1,]$correct, na.rm = TRUE),3)

data.control=data.merge[data.merge$treat.x==0,]
data.control=data.control[!is.na(data.control$district.x),]
m.control=round(mean(data.control$correct),3)
sd.control=round(sd(data.control$correct),3)

m.control.bad=round(mean(data.control[data.control$badnews.x==1,]$correct, na.rm = TRUE),3)
sd.control.bad=round(sd(data.control[data.control$badnews.x==1,]$correct, na.rm = TRUE),3)
m.control.good=round(mean(data.control[data.control$goodnews.x==1,]$correct, na.rm = TRUE),3)
sd.control.good=round(sd(data.control[data.control$goodnews.x==1,]$correct, na.rm = TRUE),3)

treatplot=ggplot(data.treat, aes(reorder(factor(data.treat$district.x), data.treat$count, sum), fill=factor(correct))) +
  geom_bar()+
  theme_bw()+
  scale_fill_manual(guide = guide_legend(title = "Same as Modal \nResponse",
                                         keywidth=3,
                                         keyheight=3),
                    values=c("#d0d3d4","#000000"),
                    labels = c("False", "True", "")
  )+
  theme(axis.text = element_text(size = 24, angle = 90,hjust = 1),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size=25),
        legend.text=element_text(size = 22),
        legend.title=element_text(size = 22),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()
        
  )+
  ylab("Number of Respondents")+
  scale_y_continuous(expand=c(0.02,0), limits = c(0, 500))+
  annotate(geom="text", x=12, y=450, label="Treatment Group", size=15,
           color="black")+
  annotate(geom="text", x=12, y=410, label=paste("mu==",m.treat), size=12,
           color="black", parse=TRUE)+
  annotate(geom="text", x=12, y=380, label=paste("mu['+']==",m.treat.good), size=12,
           color="black", parse=TRUE)+
  annotate(geom="text", x=12, y=350, label=paste("mu['-']==",m.treat.bad), size=12,
           color="black", parse=TRUE)



controlplot=ggplot(data.control, aes(reorder(factor(data.control$district.x), data.control$count, sum), fill=factor(correct))) +
  geom_bar()+
  theme_bw()+
  scale_fill_manual(guide = guide_legend(title = "Same as Modal \nResponse",
                                         keywidth=3,
                                         keyheight=3),
                    values=c("#d0d3d4","#000000"),
                    labels = c("False", "True", "")
  )+
  theme(axis.text = element_text(size = 24, angle = 90,hjust = 1),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size=25),
        legend.text=element_text(size = 22),
        legend.title=element_text(size = 22),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()
        
  )+
  ylab("Number of Respondents")+
  scale_y_continuous(expand=c(0.02,0), limits = c(0, 500))+
  annotate(geom="text", x=12, y=450, label="Control Group", size=15,
           color="black")+
  annotate(geom="text", x=12, y=410, label=paste("mu==",m.control), size=12,
           color="black", parse=TRUE)+
  annotate(geom="text", x=12, y=380, label=paste("mu['+']==",m.control.good), size=12,
           color="black", parse=TRUE)+
  annotate(geom="text", x=12, y=350, label=paste("mu['-']==",m.control.bad), size=12,
           color="black", parse=TRUE)

#jpeg("./Figures/FigureS6.jpg",width=2000,height=1000, quality=100)
multiplot(treatplot, controlplot, cols=2)
#dev.off()

data.simple=NULL
data.temp=NULL
data.merge=NULL
data.treat=NULL
data.control=NULL


######################################################################################
###Figure S7: Descriptive data for voters' priors about budget management at the district level
######################################################################################

#jpeg("./Figures/FigureS7.jpg",width=500,height=500, quality=100)
ggplot(data[!is.na(data$f.budget.prior),], aes(f.budget.prior))+
  geom_bar(position = "dodge", fill="black")+
  theme_bw()+
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 14, face="bold"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)
  )+  labs(x = "Respondent Prior Beliefs about Budget Performance", y="Number of Responses")

#dev.off()


######################################################################################
###Figure S8: Distribution of audit performance scores for budget management disseminated as part of treatment
######################################################################################

#jpeg("./Figures/FigureS8.jpg",width=500,height=500, quality=100)

ggplot(data[!is.na(data$f.budget.actual),], aes(f.budget.actual))+
  geom_bar(position = "dodge", fill="black")+
  theme_bw()+
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 14, face="bold"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)
  )+  labs(x = "Audit Budget Performance", y="Number of Responses")

#dev.off()


######################################################################################
###Figure S9: Difference between audit and voters prior budget performance scores
######################################################################################

data$budget.diff.prior=data$budget.actual-data$budget.prior
data$budget.diff.post=data$budget.actual-data$budget.post
data$worseaudit=ifelse(data$budget.actual>=3, 0, 1)

data.temp=data[, c("budget.diff.prior", "worseaudit", "budget.treat")]
data.temp$countme=1
data.temp=aggregate(data.temp, by=list(budget.diff.prior.x=data.temp$budget.diff.prior, worse.x=data.temp$worseaudit, budget.treat.x=data.temp$budget.treat), sum)
data.temp$worse.x=factor(data.temp$worse.x, labels=c("Worse Audit\n(Audit Score<3)", "Better Audit\n(Audit Score>3)"))

cbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

treat.plot=ggplot(data.temp[data.temp$budget.treat.x==1,], aes(x=budget.diff.prior.x, y=countme, fill=worse.x))+
  geom_col()+  
  theme_bw()+
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 14, face="bold"),
        plot.title = element_text(size = 14, face="bold"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()
  )+  labs(x = "Audit Score - Prior Belief", y="Number of Responses")+
  scale_x_continuous(limits=c(-4.5, 4.5), breaks=c(-4,-3,-2,-1,0, 1, 2, 3,4))+
  ggtitle("Treatment Group (N=7,955)")+
  guides(fill=FALSE)+
  scale_fill_manual(values=cbPalette)


control.plot=ggplot(data.temp[data.temp$budget.treat.x==0,], aes(x=budget.diff.prior.x, y=countme, fill=worse.x))+
  geom_col()+  
  theme_bw()+
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 14, face="bold"),
        plot.title = element_text(size = 14, face="bold"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        legend.text = element_text(size = 16)
  )+  labs(x = "Audit Score - Prior Belief", y="Number of Responses")+
  scale_x_continuous(limits=c(-4.5, 4.5), breaks=c(-4,-3,-2,-1,0, 1, 2, 3,4))+
  ggtitle("Control Group (N=7,930)")+
  guides(fill=guide_legend(title="", keywidth=0.3,
                           keyheight=0.6,
                           default.unit="inch"))+
  scale_fill_manual(values=cbPalette)

#jpeg("./Figures/FigureS9.jpg",width=800,height=500, quality=100)

grid.arrange(treat.plot, control.plot, ncol=2, widths=c(660,910))

#dev.off()

data.temp=NULL


######################################################################################
###Figure S10: Balance of individual-level pre-treatment covariates at baseline
######################################################################################

balance.plot <- function(dta, var, treat, title){
  out <-  ggplot(data=dta, aes_string(x = var, fill = treat)) + 
    geom_bar(data=dta[dta[,treat]==1,]) + 
    geom_bar(data=dta[dta[,treat]==0,] ,aes(y=..count..*(-1))) + 
    scale_y_continuous(breaks=seq(-2000,2000,2000),labels=abs(seq(-2000,2000,2000))) +
    coord_flip() + theme(legend.position="none") +
    ggtitle(title) +
    theme(plot.title = element_text(hjust = 0.5), axis.title.y = element_blank(), axis.title.x = element_blank(), axis.text.y = element_text(size=7))
  return(out)
}

###Cleaning the data and levels
covs0 <- data[,c("budget.treat",
                 "b.Q1_living_conditions",
                 "lc5.councillor.same.tribe",
                 "lc5.turnout.intent",
                 "lc5.councillor.intent",
                 "lc5.chair.intent",
                 "partyID",
                 "party.attachment",
                 "b.Q11_2011_Vote_for_LC5councilor",
                 "b.Q12_2011_vote_for_LC5chair",
                 "b.vote.buying",
                 "budget.importance",
                 "ps.importance",
                 "b.Q18_Most_important_public_serv",
                 "b.Q19_Second_most_important_publ",
                 "b.Q21a_Trust_in_politicians_for_",
                 "b.trust.Twaweza",
                 "b.Q21c_Trust_in_NGOs_for_info",
                 "b.trust.AG",
                 "b.Q22_LC5_record_of_budget_mgmt",
                 "budget.certainty",
                 "b.Q24_Powerful_ppl_learning_how_",
                 "b.Q25_Will_counting_votes_be_fai",
                 "r.What_is_your_gender",
                 "r.education",
                 "r.In_which_language_would_you_pr",
                 "r.What_is_your_age",
                 "d.attr")
              ] #Run the whole "data" and budget setup blocks first in VoteChoice_Analysis.R
#Note: some of these variables have already been cleaned and ordered in "data" setup block and budget setup block

##Changing variable names for plot labeling
#names(covs0)
var.names <- c("budget.treat","Living Conditions","Same Tribe Councillor","Turnout Intent","Councillor Vote Intent",
               "Chair Vote Intent","Party","Party Attachment","Voted for Councillor 2011","Voted for Chair 2011","Vote Buying",
               "Budget Importance","Services Importance","Most Important Service","Second Important Service","Trust Local Politicians",
               "Trust Twaweza","Trust NGOs","Trust Auditor General","Budget Prior","Budget Prior Certainty","Voting Secret","Vote Count Fair",
               "Gender","Education","Language","Age")

##Changing labels within variables
cols <- 2:26
covs0[,cols] = apply(covs0[,cols], 2, function(x) (as.character(x)))

table(covs0[,2], useNA="always")
covs0[covs0=="don_t_know"] <- "don't know"
covs0[covs0=="much_better"] <- "much better"
covs0[covs0=="much_worse"] <- "much worse"
covs0[covs0=="refused_to_ans"] <- "refused"
covs0[covs0=="the_same"] <- "same"
table(covs0[,3], useNA="always")
covs0[,3] <- ifelse(is.na(covs0[,3]),"missing",covs0[,3])
covs0[,3] <- ifelse(covs0[,3]==" 1","yes",covs0[,3])
covs0[,3] <- ifelse(covs0[,3]==" 0","no",covs0[,3])
table(covs0[,4], useNA="always")
covs0[,4] <- ifelse(covs0[,4]==" 1","yes",covs0[,4])
covs0[,4] <- ifelse(covs0[,4]==" 0","no",covs0[,4])
covs0[,4] <- ifelse(is.na(covs0[,4]),"missing",covs0[,4])
table(covs0[,5], useNA="always")
covs0[,5] <- ifelse(covs0[,5]==" 1","yes",covs0[,5])
covs0[,5] <- ifelse(covs0[,5]==" 0","no",covs0[,5])
covs0[,5] <- ifelse(is.na(covs0[,5]),"missing",covs0[,5])
table(covs0[,6], useNA="always")
covs0[,6] <- ifelse(covs0[,6]==" 1","yes",covs0[,6])
covs0[,6] <- ifelse(covs0[,6]==" 0","no",covs0[,6])
covs0[,6] <- ifelse(is.na(covs0[,6]),"missing",covs0[,6])
table(covs0[,7], useNA="always")
covs0[,7] <- ifelse(covs0[,7]=="other-noincumbentpartymatch","other",covs0[,7])
covs0[,7] <- ifelse(is.na(covs0[,7]),"missing",covs0[,7])
table(covs0[,8], useNA="always")
covs0[,8] <- ifelse(covs0[,8]=="1","1 (very little)",covs0[,8])
covs0[,8] <- ifelse(covs0[,8]=="7","7 (very much)",covs0[,8])
covs0[,8] <- ifelse(covs0[,8]=="not_applicable","no party",covs0[,8])
table(covs0[,9], useNA="always")
covs0[covs0=="i_did_not_vote"] <- "didn't vote"
covs0[covs0==""] <- "missing"
table(covs0[,10], useNA="always")
table(covs0[,11], useNA="always")
covs0[covs0=="somewhat_likel"] <- "somewhat likely"
covs0[covs0=="somewhat_unlik"] <- "somewhat unlikely"
covs0[covs0=="very_likel"] <- "very likely"
covs0[covs0=="very_unlikely"] <- "very unlikely"
table(covs0[,12], useNA="always")
covs0[covs0=="not_important"] <- "not important"
covs0[covs0=="not_very_impor"] <- "not very important"
covs0[covs0=="somewhat_impor"] <- "somewhat important"
covs0[covs0=="very_important"] <- "very important"
table(covs0[,13], useNA="always")
covs0[is.na(covs0)] <- "missing"
table(covs0[,14], useNA="always")
covs0[covs0=="local_health_services"] <- "health"
covs0[covs0=="local_roads"] <- "roads"
covs0[covs0=="primary_schools"] <- "schools"
covs0[covs0=="water_access"] <- "water"
covs0[covs0=="refused_to_answer"] <- "refused"
table(covs0[,15], useNA="always")
covs0[covs0=="local_health_s"] <- "health"
covs0[covs0=="primary_school"] <- "schools"
table(covs0[,16], useNA="always")
covs0[covs0=="do_not_trust_a"] <- "don't trust at all"
covs0[covs0=="option_5"] <- "refused"
covs0[covs0=="trust_a_little"] <- "trust a little"
covs0[covs0=="trust_a_lot"] <- "trust a lot"
table(covs0[,17], useNA="always")
table(covs0[,18], useNA="always")
table(covs0[,19], useNA="always")
table(covs0[,20], useNA="always")
covs0[covs0=="a_little_worse"] <- "a little worse"
table(covs0[,21], useNA="always")
covs0[covs0=="not_certain"] <- "not certain"
covs0[covs0=="very_certain"] <- "very certain"
covs0[covs0=="very_uncertain"] <- "very uncertain"
table(covs0[,22], useNA="always")
covs0[covs0=="not_at_all_lik"] <- "not at all likely"
covs0[covs0=="not_very_likel"] <- "not very likely"
covs0[covs0=="very_likely"] <- "very likely"
table(covs0[,23], useNA="always")
table(covs0[,24], useNA="always")
table(covs0[,25], useNA="always")
covs0[covs0=="completed_mast"] <- "complete graduate"
covs0[covs0=="completed_prim"] <- "complete primary"
covs0[covs0=="completed_seco"] <- "complete secondary"
covs0[covs0=="completed_univ"] <- "complete university"
covs0[covs0=="no_schooling"] <- "no schooling"
covs0[covs0=="refuse_to_answ"] <- "refused"
covs0[covs0=="some_post_grad"] <- "some graduate"
covs0[covs0=="some_primary_s"] <- "some primary"
covs0[covs0=="some_secondary"] <- "some secondary"
covs0[covs0=="some_universit"] <- "some university"
table(covs0[,26], useNA="always") #Rename these in letters
covs0[,26] <- ifelse(covs0[,26]!="luganda" & covs0[,26]!="english" & covs0[,26]!="runyankole" & covs0[,26]!="langi" & covs0[,26]!="ateso", "other", covs0[,26])
covs0[covs0=="AUTOMATIC"] <- "other"
table(covs0[,27], useNA="always")

##Creating factors with appropriate levels
table(covs0[,2], useNA="always")
covs0[,2] <- factor(covs0[,2], levels=rev(c("much better","better","same","worse","much worse","don't know","missing","refused")))
sort(table(covs0[,7]))
covs0[,7] <- factor(covs0[,7], levels=c("missing","ufa","other","upc","dp","fdc","independent","nrm"))
covs0[,8] <- factor(covs0[,8], levels=rev(c("7 (very much)","6","5","4","3","2","1 (very little)","no party","missing")))
covs0[,9] <- factor(covs0[,9], levels=c("missing","refused","don't know","didn't vote","no","yes"))
covs0[,10] <- factor(covs0[,10], levels=c("missing","refused","don't know","didn't vote","no","yes"))
covs0[,11] <- factor(covs0[,11], levels=c("missing","don't know","very likely","somewhat likely","somewhat unlikely","very unlikely"))
covs0[,14] <- factor(covs0[,14], levels=c("missing","refused","don't know","schools","roads","water","health"))
covs0[,15] <- factor(covs0[,15], levels=c("missing","refused","don't know","schools","roads","water","health"))
covs0[,16] <- factor(covs0[,16], levels=c("missing","refused","don't know","don't trust at all","trust a little","trust a lot"))
covs0[,17] <- factor(covs0[,17], levels=c("missing","refused","don't know","don't trust at all","trust a little","trust a lot"))
covs0[,18] <- factor(covs0[,18], levels=c("missing","refused","don't know","don't trust at all","trust a little","trust a lot"))
covs0[,19] <- factor(covs0[,19], levels=c("missing","refused","don't know","don't trust at all","trust a little","trust a lot"))
covs0[,20] <- factor(covs0[,20], levels=c("missing","refused","much worse","a little worse","don't know","better","much better"))
covs0[,21] <- factor(covs0[,21], levels=c("missing","very uncertain","not certain","certain","very certain"))
covs0[,22] <- factor(covs0[,22], levels=c("missing","refused","don't know","not at all likely","not very likely","somewhat likely","very likely"))
covs0[,23] <- factor(covs0[,23], levels=c("missing","refused","don't know","not at all likely","not very likely","somewhat likely","very likely"))
covs0[,25] <- factor(covs0[,25], levels=c("refused","no schooling","some primary","complete primary","some secondary","complete secondary","some university","complete university","some graduate","complete graduate"))
covs0[,26] <- factor(covs0[,26], levels=c("other","ateso","langi","runyankole","english","luganda"))

###Balance at baseline (before attrition)
df <- data.frame(x=c(4,4),y=c(6,4),treat=c(1,0))
text.df <- data.frame(x=c(5,5),y=c(6,4),label=c("Treatment","Control"))
leg <- ggplot(df) + geom_point(shape=15, size=5, x=df$x, y=df$y, aes(colour=df$treat)) + xlim(0, 10) + ylim(0, 10) + 
  geom_text(aes(x=text.df$x, y=text.df$y, label=text.df$label), hjust=0, vjust=0.4, size=5) +
  theme(axis.line=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        legend.position="none",
        panel.background=element_blank(),
        panel.border=element_blank(),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        plot.background=element_blank()) +
  theme(axis.line=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),
        legend.position="none",
        panel.background=element_blank(),
        panel.border=element_blank(),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        plot.background=element_blank())

#pdf("./Figures/FigureS10.pdf",height=11, width=8.5)
plot.list <- list()

for (i in 2:ncol(covs0)-1){
  plot.list[[i]] <- balance.plot(dta=covs0, var=names(covs0)[i], treat="budget.treat", title=var.names[i])
}

grid.arrange(plot.list[[2]],plot.list[[3]],plot.list[[4]],
             plot.list[[5]],plot.list[[6]],plot.list[[7]],plot.list[[8]],
             plot.list[[9]],plot.list[[10]],plot.list[[11]],plot.list[[12]],
             plot.list[[13]],plot.list[[14]],plot.list[[15]],plot.list[[16]],
             plot.list[[17]],plot.list[[18]],plot.list[[19]],plot.list[[20]],
             plot.list[[21]],plot.list[[22]],plot.list[[23]],plot.list[[24]],
             plot.list[[25]],plot.list[[26]],plot.list[[27]],leg,
             ncol=3)

#dev.off()


######################################################################################
###Figure S11: Balance of individual-level pre-treatment covariates among subjects reached during the endline survey, after attrition from baseline
######################################################################################

#Run "Figure S10" block

data.end <- subset(covs0,d.attr==0)

#pdf("./Figures/FigureS11.pdf",height=11, width=8.5)
plot.list <- list()

for (i in 2:ncol(data.end)-1){
  plot.list[[i]] <- balance.plot(dta=data.end, var=names(data.end)[i], treat="budget.treat", title=var.names[i])
}

grid.arrange(plot.list[[2]],plot.list[[3]],plot.list[[4]],
             plot.list[[5]],plot.list[[6]],plot.list[[7]],plot.list[[8]],
             plot.list[[9]],plot.list[[10]],plot.list[[11]],plot.list[[12]],
             plot.list[[13]],plot.list[[14]],plot.list[[15]],plot.list[[16]],
             plot.list[[17]],plot.list[[18]],plot.list[[19]],plot.list[[20]],
             plot.list[[21]],plot.list[[22]],plot.list[[23]],plot.list[[24]],
             plot.list[[25]],plot.list[[26]],plot.list[[27]],leg,
             ncol=3)

#dev.off()


######################################################################################
###Figure S12: Proportion of Flippable Election Losses in Positive News Eligible Elections
###Figure S13: Proportion of Flippable Election Wins in Negative News Eligible Elections
######################################################################################

#Estimated density of cell phones (from Pew survey)
cellphonesat=0.65

#Estimated consistency of priors (from within sample consistency estimates)
consistentpriors.bad=0.9747527
consistentpriors.good= 0.9103768

#Sample average treatment effects
bad.sate=0.035
bad.sate.ub=0.035+0.016
bad.sate.lb=0.035-0.016
good.sate=0.028
good.sate.ub=0.028+0.018
good.sate.lb=0.028-0.018

#Sample complier average treatment effects
good.cate=0.051
good.cate.ub=0.051+0.023
good.cate.lb=0.051-0.023

bad.cate=0.056
bad.cate.ub=0.056+0.020
bad.cate.lb=0.056-0.020


#Estimated intervention effect (note caveats in text)
bad.pate=((consistentpriors.bad*bad.sate)-((1-consistentpriors.bad)*good.sate))*cellphonesat
good.pate=((consistentpriors.good*good.sate)-((1-consistentpriors.bad)*bad.sate))*cellphonesat

bad.pate.ub=((consistentpriors.bad*bad.sate.ub)-((1-consistentpriors.bad)*good.sate.ub))*cellphonesat
good.pate.ub=((consistentpriors.good*good.sate.ub)-((1-consistentpriors.bad)*bad.sate.ub))*cellphonesat

bad.pate.lb=((consistentpriors.bad*bad.sate.lb)-((1-consistentpriors.bad)*good.sate.lb))*cellphonesat
good.pate.lb=((consistentpriors.good*good.sate.lb)-((1-consistentpriors.bad)*bad.sate.lb))*cellphonesat

#exclude unopposed candidates
candidates=candidates[!is.na(candidates$perc),]

#match candidates with their audit score
candidates$budgetcategory=budget_audit[match(toupper(candidates$district), toupper(budget_audit$District_Name)), "Quartile"]
candidates$goodnews=FALSE
candidates$goodnews=ifelse(grepl("better",candidates$budgetcategory), TRUE, candidates$goodnews)
candidates$badnews=FALSE
candidates$badnews=ifelse(grepl("worse",candidates$budgetcategory), TRUE, candidates$badnews)

#subset to incumbents in competitive elections. 
incumbents=candidates[candidates$incumbent==1,]
incumbents=incumbents[incumbents$WINNER!="Unopposed",]
incumbents=incumbents[!is.na(incumbents$incumbentmargin),]
incumbents=incumbents[incumbents$incumbentmargin<1,]
incumbents=incumbents[!duplicated(incumbents$locid),]

incumbents.bad=incumbents[incumbents$badnews==TRUE,]
bad.N.treat=nrow(incumbents.bad[incumbents.bad$incumbentmargin>0 & incumbents.bad$incumbentmargin<bad.pate,])
bad.N.treat.ub=nrow(incumbents.bad[incumbents.bad$incumbentmargin>0 & incumbents.bad$incumbentmargin<bad.pate.ub,])
bad.N.treat.lb=nrow(incumbents.bad[incumbents.bad$incumbentmargin>0 & incumbents.bad$incumbentmargin<bad.pate.lb,])
bad.N.tot=nrow(incumbents.bad[!is.na(incumbents.bad$incumbentmargin),])
bad.N.perc=bad.N.treat/bad.N.tot
bad.N.perc.ub=bad.N.treat.ub/bad.N.tot
bad.N.perc.lb=bad.N.treat.lb/bad.N.tot

incumbents.bad$incumbentmargin=incumbents.bad$incumbentmargin*100
incumbents.bad=incumbents.bad[incumbents.bad$incumbentmargin>0,]

incumbents.good=incumbents[incumbents$goodnews==TRUE,]
good.N.treat=nrow(incumbents.good[incumbents.good$incumbentmargin<0 & incumbents.good$incumbentmargin>good.pate*-1,])
good.N.treat.ub=nrow(incumbents.good[incumbents.good$incumbentmargin<0 & incumbents.good$incumbentmargin>good.pate.ub*-1,])
good.N.treat.lb=nrow(incumbents.good[incumbents.good$incumbentmargin<0 & incumbents.good$incumbentmargin>good.pate.lb*-1,])
good.N.tot=nrow(incumbents.good[!is.na(incumbents.good$incumbentmargin),])
good.N.perc=good.N.treat/good.N.tot
good.N.perc.ub=good.N.treat.ub/good.N.tot
good.N.perc.lb=good.N.treat.lb/good.N.tot

incumbents.good$incumbentmargin=incumbents.good$incumbentmargin*100
incumbents.good=incumbents.good[incumbents.good$incumbentmargin<0,]

tot.perc.ub=(good.N.treat.ub+bad.N.treat.ub)/(good.N.tot+bad.N.tot)
tot.perc.lb=(good.N.treat.lb+bad.N.treat.lb)/(good.N.tot+bad.N.tot)

ub.title=paste("Flippable Elections Upper Bound (N=",good.N.treat.ub, ")",sep="")
lb.title=paste("Flippable Elections Lower Bound (N=",good.N.treat.lb, ")",sep="")
m.title=paste("Flippable Elections Mean (N=",good.N.treat, ")",sep="")

color.lb="firebrick"
color.ub="gold2"
bgcolor="gray80"
#jpeg("./Figures/FigureS12.jpg",width=800,height=300, quality=100)
p=ggplot(incumbents.good, aes(x=incumbentmargin))+
  geom_density( data=subset(incumbents.good), fill=bgcolor, color=bgcolor)+
  scale_x_continuous(limits=c(-95, 0), breaks=seq(-95,0, by=5))

d <- ggplot_build(p)$data[[1]]    
p=p+ 
  geom_area(data = subset(d, x < 0 & x>(good.pate.ub*-100)), aes(x=x, y=y, fill=ub.title), color=color.ub)+
  geom_area(data = subset(d, x < 0 & x>(good.pate.lb*-100)), aes(x=x, y=y, fill=lb.title), color=color.lb)+
  geom_area(data = subset(d, x<((good.pate*-100)+0.2) & x>((good.pate*-100)-0.2)), aes(x=x, y=y, fill=m.title), color="black")+
  
  theme_bw()+
  theme(axis.text = element_text(size = 13),
        axis.title = element_text(size = 16, face="bold"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x=element_text(hjust=1,vjust=0.5),
        plot.title = element_text(size=17, face="bold"),
        legend.text = element_text(size = 16),
        legend.title = element_blank(),
        legend.position = c(0.1, 0.9),
        legend.justification = c(0, 1)
  )+  
  labs(x = "Incumbent Loss Margin (%)", y="Density")+
  scale_fill_manual("Legend title", values = c( color.lb,"black", color.ub ))+
  ggtitle("Number of Flippable Election Losses in Good News Eligible Districts")

p
#dev.off()


ub.title=paste("Flippable Elections Upper Bound (N=",bad.N.treat.ub, ")",sep="")
lb.title=paste("Flippable Elections Lower Bound (N=",bad.N.treat.lb, ")",sep="")
m.title=paste("Flippable Elections Mean (N=",bad.N.treat, ")",sep="")


#jpeg("./Figures/FigureS13.jpg",width=800,height=300, quality=100)
p=ggplot(incumbents.bad, aes(x=incumbentmargin))+
  geom_density( data=subset(incumbents.bad), fill=bgcolor, color=bgcolor)+
  scale_x_continuous(limits=c(0, 95), breaks=seq(0, 95, by=5))

d <- ggplot_build(p)$data[[1]]    
p=p+ 
  geom_area(data = subset(d, x > 0 & x<(bad.pate.ub*100)), aes(x=x, y=y,  fill=ub.title), color=color.ub)+
  geom_area(data = subset(d, x > 0 & x<(bad.pate.lb*100)), aes(x=x, y=y,  fill=lb.title), color=color.lb)+
  geom_area(data = subset(d, x>((bad.pate*100)-0.2) & x<((bad.pate*100)+0.2)), aes(x=x, y=y, fill=m.title), color="black")+
  
  theme_bw()+
  theme(axis.text = element_text(size = 13),
        axis.title = element_text(size = 16, face="bold"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x=element_text(hjust=1,vjust=0.5),
        plot.title = element_text(size=17, face="bold"),
        legend.text = element_text(size = 16),
        legend.title = element_blank(),
        legend.position = c(0.9, 0.9),
        legend.justification = c(1, 1)
  )+  
  labs(x = "Incumbent Win Margin (%)", y="Density")+
  scale_fill_manual("Legend title", values = c( color.lb,"black", color.ub ))+
  ggtitle("Number of Flippable Election Victories in Bad News Eligible Districts")
p
#dev.off()


######################################################################################
###Figure S14: Effect of treatment on changes in respondent beliefs about budget performance
######################################################################################


budget.good$priorpostdiff=budget.good$budget.prior-budget.good$budget.post
budget.bad$priorpostdiff=budget.bad$budget.prior-budget.bad$budget.post

#average effects
budget.test=budget.good
t1=t.test(budget.test$priorpostdiff~budget.test$budget.treat, conf.level=0.9)
budget.test=budget.bad
t2=t.test(budget.test$priorpostdiff~budget.test$budget.treat, conf.level=0.9)
#verified recipients
budget.test=budget.good[budget.good$d.X_11_Over_the_last_several_days %in% "yes", ]
t3=t.test(budget.test$priorpostdiff~budget.test$budget.treat, conf.level=0.9)
r3=felm(budget.post ~ budget.treat + budget.prior | location.id | 0 | lc5.chair.name, data=budget.test)
budget.test=budget.bad[budget.bad$d.X_11_Over_the_last_several_days %in% "yes",]
t4=t.test(budget.test$priorpostdiff~budget.test$budget.treat, conf.level=0.9)
r4=felm(budget.post ~ budget.treat + budget.prior | location.id | 0 | lc5.chair.name, data=budget.test)


#setup vectors for plotting
upper.vec=c(t1$conf.int[2], t2$conf.int[2], t3$conf.int[2], t4$conf.int[2])
lower.vec=c(t1$conf.int[1], t2$conf.int[1], t3$conf.int[1], t4$conf.int[1])
coef.vec=c(mean(t1$conf.int), mean(t2$conf.int), mean(t3$conf.int), mean(t4$conf.int))
varnames=c("Good News", "Bad News", "Good News \nVerified Recipients", "Bad News \nVerified Recipients")
c.mean.vec=c(t1$estimate[1], t2$estimate[1], t3$estimate[1], t4$estimate[1])

ylimits=c(0,5)
xlimits=c(-0.22,0.22)
maintitle=""
plotcolors=c("black", "red")

#jpeg("./Figures/FigureS14.jpg",width=900,height=700,  quality = 100)

maintitle="Effect of Treatment on Changes in Respondent \nBeliefs about Budget Performance"
y.axis <- c(length(coef.vec):1)#create indicator for y.axis, descending so that R orders vars from top to bottom on y-axis
font.type=1
par(mar=c(5, 20, 5, 2), 
    font.main=font.type,
    font.axis=font.type,
    font.lab=font.type,
    font=font.type,
    par(xpd=FALSE),
    cex=1)

plot(coef.vec, y.axis, axes = F, type = "p", ylab = "", xlab = "",pch = 19, cex = 0.5, xlim = xlimits, ylim=ylimits,xaxs = "r",  main=maintitle, cex.main=1.6) 
mtext(text="Treatment Effect", side=1, padj=1.9, cex=1.8, font=(font.type-1))
segments(lower.vec, y.axis, upper.vec, y.axis, lwd =  3, col=plotcolors)
points(coef.vec, y.axis, type = "p", pch = 19, cex = 2.3, col=plotcolors)

axis(1, at = round(seq(-.90,.90,by=0.05), digits=2), tick = T,#draw x-axis and labels with tick marks
     cex.axis = 1.5, mgp = c(2,.6,0))#reduce label size, moves labels closer to tick marks    

axis(2, at = y.axis, label = varnames, las = 1, tick = T, mgp = c(2,.6,0),
     cex.axis = 1.9, lwd=1) #draw y-axis with tick marks, make labels perpendicular to axis and closer to axis

for(i in y.axis)
{
  text(x=0.20, y=y.axis[i], labels=substitute(bar(y[c])==b3,list(b3=round(c.mean.vec[i],2))), cex=1.5)
}

segments(0,-100,0,max(ylimits)+100,lty=2, lwd=1.5) # draw dotted line through 0
box(bty = "o", lwd=1) #place box around plot


#dev.off()


